# 训练数据集与测试数据集的分割工具
import numpy as np

"""
X 原始数据集中的特征矩阵
y 原始数据集中的标签向量
test_radio 测试数据集的占比 默认0.2
seed 随机种子
"""

def train_test_split(X, y, test_radio=0.2, seed=None):
    if seed:
        np.random.seed(seed)  # 设置随机种子
    shuffle_indexes = np.random.permutation(len(X))
    test_size = int(len(X) * test_radio)
    test_indexes = shuffle_indexes[:test_size]
    train_indexes = shuffle_indexes[test_size:]
    X_train = X[train_indexes]
    y_train = y[train_indexes]
    X_test = X[test_indexes]
    y_test = y[test_indexes]
    return X_train, X_test,y_train, y_test
